MyData <- read.csv(file="diag.csv", header=TRUE, sep=",")
MyData_str<- data.frame(lapply(MyData, as.character), stringsAsFactors=FALSE)
str(MyData_str)
## 'data.frame': 101766 obs. of 4 variables:
## $ patient_nbr: chr "8222157" "55629189" "86047875" "82442376" ...
## $ diag_1 : chr "250.83" "276" "648" "8" ...
## $ diag_2 : chr "?" "250.01" "250" "250.43" ...
## $ diag_3 : chr "?" "255" "V27" "403" ...
head(MyData)
## patient_nbr diag_1 diag_2 diag_3
## 1 8222157 250.83 ? ?
## 2 55629189 276 250.01 255
## 3 86047875 648 250 V27
## 4 82442376 8 250.43 403
## 5 42519267 197 157 250
## 6 82637451 414 411 250
diags<-c(as.character(MyData$diag_1),as.character(MyData$diag_2),as.character(MyData$diag_3))
head(diags)
## [1] "250.83" "276" "648" "8" "197" "414"
print(paste("The length of the vector: ",length(diags)))
## [1] "The length of the vector: 305298"
UniqueDiags<-unique(diags)
print(paste("The number of unique codes: ",length(UniqueDiags)))
## [1] "The number of unique codes: 916"
diagSets=0
for( i in 0:nrow(MyData)){
diagSets[i]=paste0(MyData$diag_1[i],'|',MyData$diag_2[i],'|',MyData$diag_3[i])
}
diagSets[0:10]
## [1] "250.83|?|?" "276|250.01|255" "648|250|V27" "8|250.43|403"
## [5] "197|157|250" "414|411|250" "414|411|V45" "428|492|250"
## [9] "398|427|38" "434|198|486"
print(paste("The number of combinations: ",length(diagSets)))
## [1] "The number of combinations: 101766"
unique_diagSets<-unique(diagSets)
print(paste("The number of unique combinations: ",length(unique_diagSets)))
## [1] "The number of unique combinations: 58166"
for(i in LETTERS){
x<-diagE_unique<-UniqueDiags[startsWith(UniqueDiags,i)]
if(length(x) !=0)
{
print(i)
}
}
## [1] "E"
## [1] "V"
diagV_unique<-UniqueDiags[startsWith(UniqueDiags, "V")]
diagV_unique
## [1] "V57" "V58" "V55" "V53" "V45" "V66" "V56" "V26" "V71" "V54" "V67"
## [12] "V60" "V43" "V63" "V25" "V70" "V07" "V51" "V15" "V10" "V42" "V44"
## [23] "V65" "V12" "V23" "V17" "V72" "V49" "V18" "V14" "V46" "V64" "V61"
## [34] "V08" "V62" "V09" "V11" "V16" "V13" "V85" "V02" "V50" "V03" "V69"
## [45] "V86" "V27" "V22" "V01" "V06"
cat("\n")
print(paste("The number of unique values: ",length(diagV_unique)))
## [1] "The number of unique values: 49"
Matched: V08
Matched only if you add .01: V53,V71,V54,V25,V15,V61
Add 00 to the end of the code: V45,V67,V10,V12,V64,V13
Add 0 to the rest of the code
diagNot<-c("V53","V71","V54","V25","V15","V61")
diag00<-c("V45","V67","V10","V12","V64","V13")
diagNochange<-c("V08")
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-diagV_unique[!(diagV_unique %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-diagNot
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "V")])
## [1] "V570" "V580" "V550" "V53" "V4500" "V660" "V560" "V260"
## [9] "V71" "V54" "V6700" "V600" "V430" "V630" "V25" "V700"
## [17] "V070" "V510"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "V")])
## [1] "V4500" "V15" "V1000" "V420" "V440" "V650" "V1200" "V570"
## [9] "V430" "V230" "V170" "V720" "V580" "V490" "V180" "V140"
## [17] "V660" "V460" "V6400" "V61" "V08" "V53" "V620" "V090"
## [25] "V54" "V110" "V160" "V700" "V1300" "V630" "V850" "V020"
## [33] "V25" "V500" "V030" "V690" "V550" "V860" "V600"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "V")])
## [1] "V270" "V4500" "V430" "V420" "V700" "V1000" "V15" "V110"
## [9] "V140" "V580" "V170" "V1200" "V440" "V25" "V6400" "V090"
## [17] "V180" "V660" "V650" "V570" "V230" "V160" "V460" "V08"
## [25] "V53" "V54" "V490" "V550" "V220" "V020" "V630" "V620"
## [33] "V720" "V600" "V61" "V850" "V1300" "V010" "V860" "V030"
## [41] "V070" "V060"
diagE_unique<-UniqueDiags[startsWith(UniqueDiags, "E")]
diagE_unique
## [1] "E909" "E878" "E812" "E932" "E888" "E939" "E937" "E944" "E870" "E849"
## [11] "E950" "E934" "E935" "E915" "E885" "E880" "E879" "E890" "E817" "E931"
## [21] "E924" "E942" "E947" "E930" "E858" "E929" "E933" "E900" "E936" "E941"
## [31] "E884" "E928" "E965" "E813" "E814" "E927" "E905" "E917" "E868" "E854"
## [41] "E918" "E850" "E887" "E881" "E829" "E919" "E916" "E819" "E826" "E938"
## [51] "E816" "E906" "E818" "E980" "E853" "E968" "E882" "E821" "E945" "E883"
## [61] "E949" "E920" "E956" "E904" "E943" "E861" "E852" "E876" "E855" "E815"
## [71] "E822" "E894" "E828" "E865" "E946" "E966" "E922" "E901" "E892" "E886"
## [81] "E987" "E912" "E955" "E864" "E825"
cat("\n")
print(paste("The number of unique values: ",length(diagE_unique)))
## [1] "The number of unique values: 85"
Matched: E956,E915,E918,E887,E916,E882,E894,E966,E892,E912
Add 0 at the end for the rest of the codes
diagNochange<-c("E956","E915","E918","E887","E916","E882","E894","E966","E892","E912")
diag0<-diagE_unique[!(diagE_unique %in% diagNochange)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "E")])
## [1] "E9090"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "E")])
## [1] "E8780" "E8120" "E9320" "E8880" "E9390" "E9370" "E9440" "E8700"
## [9] "E8490" "E9500" "E9340" "E9350" "E915" "E8850" "E8800" "E8790"
## [17] "E8900" "E8170" "E9310" "E9240" "E9420" "E9470" "E9300" "E8580"
## [25] "E9290" "E9330" "E9000" "E9360" "E9410" "E8840" "E9280" "E9650"
## [33] "E8130" "E8140" "E9270" "E9050" "E9170" "E8680" "E8540" "E918"
## [41] "E8500" "E887" "E8810" "E8290" "E9190" "E916" "E8190" "E8260"
## [49] "E9380" "E8160" "E9060" "E8180" "E9800" "E8530" "E9680" "E882"
## [57] "E8210" "E9450" "E8830"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "E")])
## [1] "E8880" "E9320" "E8780" "E8850" "E9340" "E8840" "E9330" "E8790"
## [9] "E9350" "E8490" "E9420" "E9470" "E9500" "E9490" "E9290" "E9380"
## [17] "E8160" "E9200" "E9280" "E9390" "E9310" "E9300" "E956" "E9450"
## [25] "E8170" "E9040" "E8700" "E9800" "E9430" "E8610" "E8500" "E9440"
## [33] "E8800" "E8830" "E9360" "E9050" "E9410" "E8520" "E887" "E8580"
## [41] "E9270" "E8760" "E9370" "E9170" "E8120" "E9240" "E916" "E8190"
## [49] "E9650" "E8550" "E9060" "E9190" "E8150" "E8810" "E882" "E8220"
## [57] "E8260" "E8130" "E8180" "E894" "E915" "E8530" "E8280" "E8650"
## [65] "E9460" "E966" "E9220" "E9010" "E892" "E8860" "E9870" "E912"
## [73] "E9550" "E8640" "E8250" "E9000" "E8540"
lessThan2<-diags[nchar(diags)==1]
unique(lessThan2)
## [1] "8" "?" "3" "7" "5" "9"
cat("\n")
print(paste("The number of unique values: ",length(lessThan2)))
## [1] "The number of unique values: 2797"
na<-lessThan2[startsWith(lessThan2, "?")]
cat("\n")
print(paste("The number of elements contain '?' : ",length(na)))
## [1] "The number of elements contain '?' : 1802"
Add double zero 8 = 00800
all the rest add 00 before and 0 after
MyData_str[MyData_str=="?"]<-""
head(MyData_str)
## patient_nbr diag_1 diag_2 diag_3
## 1 8222157 250.83
## 2 55629189 276 250.01 255
## 3 86047875 648 250 V270
## 4 82442376 8 250.43 403
## 5 42519267 197 157 250
## 6 82637451 414 411 250
diag0<-c("3","7","5","9")
for(i in diag0){
MyData_str[MyData_str==i]<-paste0("00",i,"0")
}
MyData_str[MyData_str=="8"]<-paste0("00","8","00")
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "00")])
## [1] "00800" "0030" "0070" "0050" "0090"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "00")])
## [1] "00800" "0090" "0050" "0070"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "00")])
## [1] "00800" "0090" "0050" "0070" "0030"
lessThan3<-diags[nchar(diags)==2]
ulessThan3<-unique(lessThan3)
ulessThan3
## [1] "38" "70" "82" "54" "78" "79" "41" "53" "49" "11" "47" "42" "39" "48"
## [15] "94" "35" "34" "88" "31" "75" "27" "61" "36" "57" "97" "66" "98" "52"
## [29] "84" "23" "58" "10" "40" "46" "96" "99" "17" "14"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan3)))
## [1] "The number of unique values: 38"
No Match: 58
Add 0 before and 00 after: 41,11,10,17,14
Add 0 before: 42,48,35,75,61,96
Add 0 before and 0 after for the rest of the code
diagNot<-c("58")
diag0Before00<-as.character(c(41,11,10,17,14))
diag0Before<-as.character(c(42,48,35,75,61,96))
diagNot0<-c(diagNot,diag0Before00,diag0Before)
diag0<-ulessThan3[!(ulessThan3 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0("0",i,"0")
}
for(i in diag0Before00){
MyData_str[MyData_str==i]<-paste0("0",i,"00")
}
for(i in diag0Before){
MyData_str[MyData_str==i]<-paste0("0",i)
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "01")])
## [1] "01100" "01000"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "03")])
## [1] "0380" "0340" "0310" "035"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "04")])
## [1] "04100" "042" "0470" "0490"
lessThan4<-diags[nchar(diags)==3]
lessThan4<-lessThan4[!startsWith(lessThan4, "V")]
unique(lessThan4)
## [1] "276" "648" "197" "414" "428" "398" "434" "157" "518" "999" "410"
## [12] "682" "402" "737" "572" "189" "786" "427" "996" "277" "584" "462"
## [23] "473" "411" "174" "486" "998" "511" "432" "626" "295" "196" "618"
## [34] "182" "845" "423" "808" "722" "403" "784" "707" "440" "151" "715"
## [45] "997" "198" "564" "812" "590" "556" "578" "433" "569" "185" "536"
## [56] "255" "599" "558" "574" "491" "560" "244" "577" "730" "188" "824"
## [67] "332" "562" "291" "296" "510" "401" "263" "438" "493" "642" "625"
## [78] "571" "738" "593" "807" "456" "446" "575" "820" "515" "780" "995"
## [89] "235" "721" "787" "162" "724" "282" "514" "281" "530" "466" "435"
## [100] "789" "566" "822" "191" "557" "733" "455" "711" "482" "202" "280"
## [111] "553" "225" "154" "441" "349" "962" "592" "507" "386" "156" "200"
## [122] "728" "348" "459" "426" "388" "607" "337" "531" "596" "288" "656"
## [133] "573" "492" "220" "516" "210" "922" "286" "885" "958" "661" "969"
## [144] "227" "112" "404" "823" "532" "416" "346" "535" "453" "250" "595"
## [155] "211" "303" "852" "218" "782" "540" "457" "285" "431" "340" "550"
## [166] "351" "601" "723" "555" "153" "443" "380" "204" "424" "241" "358"
## [177] "694" "331" "345" "681" "447" "290" "158" "579" "436" "335" "309"
## [188] "654" "805" "799" "292" "183" "851" "458" "586" "311" "892" "305"
## [199] "293" "415" "591" "794" "803" "655" "429" "278" "658" "598" "729"
## [210] "585" "444" "604" "727" "214" "552" "284" "680" "708" "644" "481"
## [221] "821" "413" "437" "968" "756" "632" "359" "275" "512" "781" "420"
## [232] "368" "522" "294" "825" "135" "304" "320" "669" "868" "496" "826"
## [243] "567" "203" "251" "565" "161" "495" "297" "663" "576" "355" "850"
## [254] "287" "611" "840" "350" "726" "537" "620" "180" "366" "783" "751"
## [265] "716" "199" "464" "580" "836" "664" "283" "813" "966" "289" "965"
## [276] "184" "480" "608" "333" "972" "212" "117" "788" "924" "959" "621"
## [287] "238" "785" "714" "942" "710" "933" "508" "478" "844" "736" "233"
## [298] "397" "395" "201" "421" "253" "600" "494" "977" "659" "312" "614"
## [309] "647" "652" "646" "274" "861" "425" "527" "451" "485" "217" "442"
## [320] "970" "193" "160" "322" "581" "475" "623" "374" "582" "568" "465"
## [331] "801" "237" "376" "150" "461" "913" "226" "617" "987" "641" "298"
## [342] "790" "336" "362" "228" "513" "383" "746" "353" "911" "506" "873"
## [353] "155" "860" "534" "802" "141" "396" "310" "341" "242" "719" "239"
## [364] "533" "616" "519" "301" "989" "230" "385" "300" "853" "871" "570"
## [375] "848" "463" "934" "236" "361" "594" "501" "810" "643" "430" "528"
## [386] "205" "791" "983" "992" "490" "172" "171" "622" "306" "863" "864"
## [397] "474" "660" "759" "356" "634" "967" "551" "695" "187" "732" "747"
## [408] "323" "308" "370" "252" "152" "846" "164" "365" "718" "266" "720"
## [419] "344" "797" "170" "878" "904" "882" "843" "709" "973" "454" "686"
## [430] "939" "487" "229" "991" "483" "357" "692" "796" "693" "935" "936"
## [441] "800" "920" "261" "307" "262" "831" "145" "223" "839" "685" "179"
## [452] "964" "136" "324" "389" "815" "334" "143" "526" "588" "192" "394"
## [463] "917" "219" "325" "792" "717" "994" "990" "793" "207" "637" "195"
## [474] "373" "847" "827" "891" "814" "703" "865" "352" "627" "378" "342"
## [485] "886" "369" "745" "705" "816" "541" "986" "610" "633" "640" "753"
## [496] "173" "835" "379" "445" "272" "382" "945" "619" "881" "866" "405"
## [507] "916" "215" "893" "671" "928" "906" "897" "725" "867" "115" "890"
## [518] "734" "521" "674" "470" "834" "146" "696" "524" "980" "691" "384"
## [529] "142" "879" "246" "208" "448" "955" "653" "149" "245" "735" "883"
## [540] "854" "952" "838" "194" "163" "216" "147" "354" "477" "318" "880"
## [551] "921" "377" "471" "683" "175" "602" "982" "706" "375" "417" "131"
## [562] "347" "870" "148" "862" "817" "914" "360" "684" "314" "240" "915"
## [573] "971" "795" "988" "452" "963" "327" "731" "842" "645" "665" "110"
## [584] "944" "603" "923" "412" "363" "957" "976" "698" "299" "700" "273"
## [595] "974" "529" "605" "941" "806" "271" "837" "657" "895" "338" "523"
## [606] "542" "114" "543" "372" "583" "422" "615" "279" "500" "903" "919"
## [617] "875" "381" "804" "704" "649" "832" "133" "975" "833" "391" "690"
## [628] "319" "258" "910" "317" "484" "138" "343" "758" "701" "872" "905"
## [639] "752" "909" "918" "947" "520" "517" "912" "702" "111" "259" "953"
## [650] "712" "741" "713" "755" "742" "869" "907" "908" "472" "811" "137"
## [661] "754" "130" "269" "232" "316" "748" "256" "186" "948" "750" "302"
## [672] "140" "670" "268" "894" "260" "270" "460" "364" "123" "884" "927"
## [683] "525" "315" "139" "313" "122" "387" "951" "697" "943" "744" "243"
## [694] "956" "265" "597" "930" "132" "757" "624" "841" "877" "538" "876"
cat("\n")
print(paste("The number of unique values: ",length(unique(lessThan4))))
## [1] "The number of unique values: 704"
lessThan41<-lessThan4[startsWith(lessThan4, "1")]
ulessThan41<-unique(lessThan41)
ulessThan41
## [1] "197" "157" "189" "174" "196" "182" "151" "198" "185" "188" "162"
## [12] "191" "154" "156" "112" "153" "158" "183" "135" "161" "180" "199"
## [23] "184" "117" "193" "160" "150" "155" "141" "172" "171" "187" "152"
## [34] "164" "170" "145" "179" "136" "143" "192" "195" "173" "115" "146"
## [45] "142" "149" "194" "163" "147" "175" "131" "148" "110" "114" "133"
## [56] "138" "111" "137" "130" "186" "140" "123" "139" "122" "132"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan41)))
## [1] "The number of unique values: 65"
No Match: 187
Matched: 185,135,193,179,138
Add 00 at the end of the code: 173,115,131
Add 0 at the end to rest of codes
diagNot<-c("187")
diag00<-c("173","115","131")
diagNochange<-c("185","135","193","179","138")
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan41[!(ulessThan41 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "1")])
## [1] "1970" "1570" "1890" "1740" "1960" "1820" "1510" "1980"
## [9] "185" "1880" "1620" "1910" "1540" "1560" "1120" "1530"
## [17] "1580" "1830" "135" "1610" "1800" "1990" "1840" "1170"
## [25] "193" "1600" "1500" "1550" "1410" "1720" "1710" "187"
## [33] "1520" "1640" "1700" "1450" "179" "1360" "1430" "1920"
## [41] "1950" "17300" "11500" "1460" "1420" "1490" "1940" "1630"
## [49] "1470" "1750" "13100" "1480" "1100" "1140" "1330"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "1")])
## [1] "1570" "1980" "1740" "135" "1960" "1970" "1500" "1530"
## [9] "1120" "1620" "1540" "1170" "179" "1890" "1550" "1720"
## [17] "1990" "1910" "1360" "1510" "185" "138" "13100" "1100"
## [25] "1880" "1820" "1110" "1560" "193" "1520" "17300" "1370"
## [33] "1300" "1830" "1630" "1710" "1640" "1860" "1450" "1920"
## [41] "11500" "1410" "1400" "1950" "1140" "1800" "1230"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "1")])
## [1] "1970" "13100" "1960" "1820" "1620" "1980" "1530" "1120"
## [9] "1550" "138" "135" "1390" "185" "1170" "1990" "1220"
## [17] "1360" "1570" "1740" "1500" "1100" "1890" "1510" "1910"
## [25] "1880" "1700" "1800" "1580" "1720" "17300" "193" "1540"
## [33] "1610" "11500" "179" "1630" "1520" "1830" "1640" "1950"
## [41] "1710" "1560" "1920" "1860" "1460" "1320" "1480" "1110"
## [49] "1230" "1410" "1750"
lessThan42<-lessThan4[startsWith(lessThan4, "2")]
ulessThan42<-unique(lessThan42)
ulessThan42
## [1] "276" "277" "295" "255" "244" "291" "296" "263" "235" "282" "281"
## [12] "202" "280" "225" "200" "288" "220" "210" "286" "227" "250" "211"
## [23] "218" "285" "204" "241" "290" "292" "293" "278" "214" "284" "275"
## [34] "294" "203" "251" "297" "287" "283" "289" "212" "238" "233" "201"
## [45] "253" "274" "217" "237" "226" "298" "228" "242" "239" "230" "236"
## [56] "205" "252" "266" "229" "261" "262" "223" "219" "207" "272" "215"
## [67] "246" "208" "245" "216" "240" "299" "273" "271" "279" "258" "259"
## [78] "269" "232" "256" "268" "260" "270" "243" "265"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan42)))
## [1] "The number of unique values: 85"
No Match: 284,275,258,260,243
Matched: 220,217,226,261,262
Add 00 at the end of the code: 277,295,296,202,200,288,250,204,278,203,201,274,228,242,205,250,207,208,299,279
Add 0 at the end to rest of codes
diagNot<-c("284","275","258","260","243")
diag00<-c("277","295","296","202","200","288","250","204","278","203","201","274","228","242","205","250","207","208","299")
diagNochange<-c("220","217","226","261","262")
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan42[!(ulessThan42 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "2")])
## [1] "250.83" "2760" "250.7" "27700" "29500" "250.6" "250.4"
## [8] "250.11" "250.32" "2550" "250.13" "2440" "250.03" "250.8"
## [15] "2910" "29600" "2630" "250.02" "250.42" "250.41" "250.22"
## [22] "2350" "250.82" "2820" "2810" "250.33" "250.12" "20200"
## [29] "2800" "2250" "250.81" "20000" "28800" "220" "2100"
## [36] "2860" "250.93" "2270" "25000" "2110" "250.01" "2180"
## [43] "2850" "20400" "2410" "2900" "2920" "2930" "27800"
## [50] "2140" "284" "275" "2940" "250.31" "250.43" "20300"
## [57] "2510" "250.1" "2970" "2870" "250.2" "250.3" "2830"
## [64] "2890" "2120" "2380" "250.23" "2330" "250.5" "20100"
## [71] "2530" "250.92" "27400" "217" "250.53" "2370" "226"
## [78] "2980" "22800" "24200" "2390" "2300" "250.21" "2360"
## [85] "20500" "2520" "2660" "2290" "261" "262" "250.9"
## [92] "2230" "2190" "20700" "2720" "250.52" "2150" "250.51"
## [99] "2460" "20800" "2450" "2160" "250.91" "2400" "29900"
## [106] "2730" "2710" "2790"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "2")])
## [1] "250.01" "25000" "250.43" "28800" "250.02" "2440" "2760"
## [8] "2860" "24200" "250.03" "250.52" "2850" "250.6" "22800"
## [15] "250.82" "2940" "250.51" "2800" "27700" "250.42" "27800"
## [22] "2720" "20300" "250.41" "250.13" "2930" "2450" "250.12"
## [29] "250.53" "284" "2920" "2900" "250.93" "2550" "250.7"
## [36] "2870" "20000" "250.83" "250.11" "2830" "250.81" "29500"
## [43] "20400" "2710" "2270" "250.5" "258" "2530" "250.91"
## [50] "250.92" "250.4" "20500" "2110" "2630" "20200" "250.23"
## [57] "20100" "2970" "2180" "220" "250.22" "250.8" "2790"
## [64] "2810" "20800" "29600" "2890" "2230" "2910" "217"
## [71] "2980" "2250" "250.9" "2330" "275" "250.1" "27400"
## [78] "2730" "2410" "2820" "250.2" "2140" "29900" "250.31"
## [85] "2520" "2590" "2390" "226" "250.33" "250.32" "2510"
## [92] "2380" "261" "250.21" "2460" "2400" "2660" "2690"
## [99] "2320" "2560" "262" "250.3" "2120" "2150" "2350"
## [106] "2680" "260" "2700"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "2")])
## [1] "2550" "25000" "250.6" "250.01" "2630" "250.42" "2760"
## [8] "250.41" "27800" "250.02" "250.8" "250.7" "250.52" "250.82"
## [15] "250.03" "29600" "250.4" "2440" "250.51" "2800" "2720"
## [22] "250.43" "250.92" "284" "2850" "20100" "2920" "2940"
## [29] "250.23" "250.53" "2820" "2870" "2180" "28800" "29500"
## [36] "2810" "250.83" "2910" "250.5" "250.91" "2380" "2930"
## [43] "2900" "250.13" "250.12" "2860" "2520" "258" "250.81"
## [50] "20200" "250.9" "2890" "24200" "2530" "250.22" "20300"
## [57] "250.93" "275" "2110" "2790" "27400" "2270" "20800"
## [64] "22800" "2400" "2700" "2660" "2450" "2830" "2510"
## [71] "250.2" "2730" "27700" "2460" "2980" "2590" "20400"
## [78] "250.1" "20500" "20000" "2350" "2410" "260" "2390"
## [85] "261" "250.11" "2710" "2140" "220" "2560" "243"
## [92] "2970" "29900" "2250" "2330" "250.3" "262" "2650"
## [99] "2160" "250.21" "250.31" "226" "2680" "2230" "2360"
## [106] "217" "2150" "2300"
lessThan43<-lessThan4[startsWith(lessThan4, "3")]
ulessThan43<-unique(lessThan43)
ulessThan43
## [1] "398" "332" "349" "386" "348" "388" "337" "346" "303" "340" "351"
## [12] "380" "358" "331" "345" "335" "309" "311" "305" "359" "368" "304"
## [23] "320" "355" "350" "366" "333" "397" "395" "312" "322" "374" "376"
## [34] "336" "362" "383" "353" "396" "310" "341" "301" "385" "300" "361"
## [45] "306" "356" "323" "308" "370" "365" "344" "357" "307" "324" "389"
## [56] "334" "394" "325" "373" "352" "378" "342" "369" "379" "382" "384"
## [67] "354" "318" "377" "375" "347" "360" "314" "327" "363" "338" "372"
## [78] "381" "391" "319" "317" "343" "316" "302" "364" "315" "313" "387"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan43)))
## [1] "The number of unique values: 88"
No Match: 350,312,362,323
Matched: 340,311,395,319,317,316
Add 00 at end of the code: 386,388,337,346,303,380,358,345,305,368,304,366,374,376,383,385,300,361,370,365,344,389,373,378,342,369,379,382,384,377,375,347,360,314,327,363,372,381,364,315
Add 0 at the end to rest of codes
diagNot<-as.character(c(350,312,362,323))
diag00<-as.character(c(386,388,337,346,303,380,358,345,305,368,304,366,374,376,383,385,300,361,370,365,344,389,373,378,342,369,379,382,384,377,375,347,360,314,327,363,372,381,364,315))
diagNochange<-as.character(c(340,311,395,319,317,316))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan43[!(ulessThan43 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "3")])
## [1] "3980" "3320" "3490" "38600" "3480" "38800" "33700" "34600"
## [9] "30300" "340" "3510" "38000" "35800" "3310" "34500" "3350"
## [17] "3090" "311" "30500" "3590" "36800" "30400" "3200" "3550"
## [25] "350" "36600" "3330" "3970" "395" "312" "3220" "37400"
## [33] "37600" "3360" "362" "38300" "3530" "3960" "3100" "3410"
## [41] "3010" "38500" "30000" "36100" "3060" "3560" "323" "3080"
## [49] "37000" "36500" "34400" "3570" "3070" "3240" "38900" "3340"
## [57] "3940" "3250" "37300" "3520" "37800" "34200" "36900" "37900"
## [65] "38200" "38400" "3540" "3180" "37700" "37500" "34700" "36000"
## [73] "31400" "32700" "36300" "3380" "37200" "38100" "3910"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "3")])
## [1] "3570" "3960" "340" "30500" "34200" "30300" "3980" "319"
## [9] "30000" "30400" "37200" "38200" "36800" "3090" "3330" "34500"
## [17] "3970" "3010" "3540" "35800" "3310" "31400" "37800" "33700"
## [25] "311" "3480" "317" "3320" "36900" "3490" "38600" "34400"
## [33] "37700" "3520" "3360" "362" "3430" "38000" "3590" "3240"
## [41] "3940" "38300" "38900" "36500" "3510" "3060" "34600" "3560"
## [49] "3220" "3410" "3350" "3200" "3080" "37300" "323" "3070"
## [57] "3250" "312" "3550" "38100" "3100" "37900" "37600" "36600"
## [65] "32700" "316" "34700" "37400" "395" "3180" "3530" "36000"
## [73] "3380" "3020" "350" "38800" "36400"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "3")])
## [1] "30500" "30300" "3320" "362" "3560" "3570" "3480"
## [8] "3970" "3310" "3490" "31500" "38200" "30000" "3960"
## [15] "30400" "33700" "311" "38600" "34200" "3070" "319"
## [22] "34600" "38000" "3090" "34400" "3940" "3010" "36500"
## [29] "3340" "3980" "36800" "34500" "3130" "37900" "37300"
## [36] "340" "3510" "3360" "38900" "36600" "3100" "3870"
## [43] "3550" "37200" "36900" "3430" "31400" "3540" "3330"
## [50] "37800" "38300" "35800" "3410" "323" "3080" "3180"
## [57] "350" "317" "36000" "3590" "3350" "3060" "395"
## [64] "38400" "37700" "34700" "312" "38500" "37400" "32700"
## [71] "37600" "36100" "365.44" "38100" "3530" "3380" "38800"
## [78] "3910" "37000"
lessThan44<-lessThan4[startsWith(lessThan4, "4")]
ulessThan44<-unique(lessThan44)
ulessThan44
## [1] "414" "428" "434" "410" "402" "427" "462" "473" "411" "486" "432"
## [12] "423" "403" "440" "433" "491" "401" "438" "493" "456" "446" "466"
## [23] "435" "455" "482" "441" "459" "426" "492" "404" "416" "453" "457"
## [34] "431" "443" "424" "447" "436" "458" "415" "429" "444" "481" "413"
## [45] "437" "420" "496" "495" "464" "480" "478" "421" "494" "425" "451"
## [56] "485" "442" "475" "465" "461" "463" "430" "490" "474" "454" "487"
## [67] "483" "445" "405" "470" "448" "477" "471" "417" "452" "412" "422"
## [78] "484" "472" "460"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan44)))
## [1] "The number of unique values: 80"
No Match: 444,445,405,484
Matched: 462,431,481,496,485,475,463,430,490,470,452,412,460
Add 00 at the end of the code: 414,434,410,402,486,433,493,441,404,436,464,474
Add 0 at the end to rest of codes
diagNot<-as.character(c(444,445,405,484))
diagNochange<-as.character(c(462,431,481,496,485,475,463,430,490,470,452,412,460))
diag00<-as.character(c(414,434,410,402,486,433,493,441,404,436,464,474))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan44[!(ulessThan44 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "4")])
## [1] "41400" "4280" "43400" "41000" "40200" "4270" "462" "4730"
## [9] "4110" "48600" "4320" "4230" "4030" "4400" "43300" "4910"
## [17] "4010" "4380" "49300" "4560" "4460" "4660" "4350" "4550"
## [25] "4820" "44100" "4590" "4260" "4920" "40400" "4160" "4530"
## [33] "4570" "431" "4430" "4240" "4470" "43600" "4580" "4150"
## [41] "4290" "444" "481" "4130" "4370" "4200" "496" "4950"
## [49] "46400" "4800" "4780" "4210" "4940" "4250" "4510" "485"
## [57] "4420" "475" "4650" "4610" "463" "430" "490" "47400"
## [65] "4540" "4870" "4830" "445" "405" "470" "4480" "4770"
## [73] "4710" "4170" "452" "412" "4220"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "4")])
## [1] "4110" "4920" "4270" "4030" "4250" "4560" "4010" "496"
## [9] "4280" "41000" "4240" "4910" "4400" "49300" "41400" "4130"
## [17] "48600" "444" "4260" "462" "40200" "4580" "4150" "4350"
## [25] "4460" "4470" "4160" "412" "44100" "4820" "4320" "4660"
## [33] "43400" "4590" "4200" "43300" "405" "4370" "4530" "4510"
## [41] "43600" "4780" "40400" "470" "4430" "4230" "4650" "481"
## [49] "4800" "4380" "4730" "490" "485" "431" "4550" "484"
## [57] "4290" "452" "4480" "4210" "4940" "430" "4870" "4570"
## [65] "4610" "4830" "4540" "47400" "4420" "4720" "46400" "4220"
## [73] "463" "475" "4950" "4770" "460"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "4")])
## [1] "4030" "48600" "4270" "41400" "4160" "4280" "4820" "4010"
## [9] "496" "4240" "4110" "490" "4910" "4200" "49300" "4250"
## [17] "4920" "4380" "40400" "4400" "4260" "4610" "4580" "4130"
## [25] "4730" "4550" "4590" "4370" "4530" "40200" "4650" "412"
## [33] "43300" "4430" "41000" "4150" "43600" "44100" "4460" "444"
## [41] "4350" "4660" "4780" "4290" "4420" "4870" "4560" "4800"
## [49] "4230" "43400" "4940" "4470" "405" "4540" "4510" "4950"
## [57] "4720" "452" "4320" "4770" "462" "4570" "46400" "445"
## [65] "481" "4210" "470" "460" "4170" "485" "431" "4830"
## [73] "475" "430" "484" "463" "4480"
lessThan45<-lessThan4[startsWith(lessThan4, "5")]
ulessThan45<-unique(lessThan45)
ulessThan45
## [1] "518" "572" "584" "511" "564" "590" "556" "578" "569" "536" "599"
## [12] "558" "574" "560" "577" "562" "510" "571" "593" "575" "515" "514"
## [23] "530" "566" "557" "553" "592" "507" "531" "596" "573" "516" "532"
## [34] "535" "595" "540" "550" "555" "579" "586" "591" "598" "585" "552"
## [45] "512" "522" "567" "565" "576" "537" "580" "508" "527" "581" "582"
## [56] "568" "513" "506" "534" "533" "519" "570" "594" "501" "528" "551"
## [67] "526" "588" "541" "521" "524" "529" "523" "542" "543" "583" "500"
## [78] "520" "517" "525" "597" "538"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan45)))
## [1] "The number of unique values: 82"
No match: 584,558,585,517
Matched: 515,514,566,586,591,570,501,541,542,500,538
Add 00 at the end of the code: 564,590,574,562,553,531,532,535,550,598,552,534,533,519,528,551,521,524,523
Add 0 at the end to rest of codes
diagNot<-as.character(c(584,558,585,517))
diagNochange<-as.character(c(515,514,566,586,591,570,501,541,542,500,538))
diag00<-as.character(c(564,590,574,562,553,531,532,535,550,598,552,534,533,519,528,551,521,524,523))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan45[!(ulessThan45 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
# test the codes
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "5")])
## [1] "5180" "5720" "584" "5110" "56400" "59000" "5560" "5780"
## [9] "5690" "5360" "5990" "558" "57400" "5600" "5770" "56200"
## [17] "5100" "5710" "5930" "5750" "515" "514" "5300" "566"
## [25] "5570" "55300" "5920" "5070" "53100" "5960" "5730" "5160"
## [33] "53200" "53500" "5950" "5400" "55000" "5550" "5790" "586"
## [41] "591" "59800" "585" "55200" "5120" "5220" "5670" "5650"
## [49] "5760" "5370" "5800" "5080" "5270" "5810" "5820" "5680"
## [57] "5130" "5060" "53400" "53300" "51900" "570" "5940" "501"
## [65] "52800" "55100" "5260" "5880" "541" "52100" "52400" "5290"
## [73] "52300" "542" "5430" "5830" "500" "58"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "5")])
## [1] "5070" "585" "5710" "5990" "55300" "5110" "5770" "53500"
## [9] "5180" "566" "57400" "5810" "5300" "5670" "5120" "5600"
## [17] "5780" "584" "558" "5730" "5690" "5360" "56200" "591"
## [25] "5750" "515" "5720" "5760" "5950" "59000" "51900" "5370"
## [33] "5680" "5830" "56400" "5960" "53100" "5080" "5130" "55000"
## [41] "5100" "5920" "59800" "5790" "570" "5930" "53200" "5570"
## [49] "5800" "5650" "5560" "55200" "5200" "517" "5160" "5940"
## [57] "52800" "501" "5220" "5060" "5880" "53300" "586" "5550"
## [65] "5430" "5400" "52100" "52400" "514" "500" "5270" "542"
## [73] "53400" "5290" "52300"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "5")])
## [1] "5820" "585" "5680" "5990" "5180" "55300" "53100" "5110"
## [9] "56200" "5810" "5290" "5950" "5600" "5880" "5690" "584"
## [17] "53500" "5930" "56400" "5070" "5250" "5720" "591" "5760"
## [25] "53300" "5780" "5710" "5360" "5960" "558" "570" "5770"
## [33] "5120" "59800" "57400" "5300" "515" "5830" "5730" "53200"
## [41] "5940" "51900" "5550" "5650" "5800" "5750" "517" "5570"
## [49] "5160" "586" "5920" "59000" "5220" "5370" "55200" "52800"
## [57] "5670" "53400" "52100" "5060" "566" "52400" "5560" "5270"
## [65] "5790" "5080" "501" "5430" "514" "5100" "5970" "542"
## [73] "52300" "500" "55000" "5400" "538"
lessThan46<-lessThan4[startsWith(lessThan4, "6")]
ulessThan46<-unique(lessThan46)
ulessThan46
## [1] "648" "682" "626" "618" "642" "625" "607" "656" "661" "601" "694"
## [12] "681" "654" "655" "658" "604" "680" "644" "632" "669" "663" "611"
## [23] "620" "664" "608" "621" "600" "659" "614" "647" "652" "646" "623"
## [34] "617" "641" "616" "643" "622" "660" "634" "695" "686" "692" "693"
## [45] "685" "637" "627" "610" "633" "640" "619" "671" "674" "696" "691"
## [56] "653" "683" "602" "684" "645" "665" "603" "698" "605" "657" "615"
## [67] "649" "690" "670" "697" "624"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan46)))
## [1] "The number of unique values: 71"
No Match: 645,690,624
Matched: 632,683,605
Add 00 ata the end of the code: 648,618,642,656,661,681,654,655,658,644,669,663,664,600,659,647,652,646,641,643,660,634,686,637,633,640,674,653,665,657,649,670
Add 0 at the end to rest of codes
diagNot<-as.character(c(645,690,624))
diagNochange<-as.character(c(632,683,605))
diag00<-as.character(c(648,618,642,656,661,681,654,655,658,644,669,663,664,600,659,647,652,646,641,643,660,634,686,637,633,640,674,653,665,657,649,670))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan46[!(ulessThan46 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "6")])
## [1] "64800" "6820" "6260" "61800" "64200" "6250" "6070" "65600"
## [9] "66100" "6010" "6940" "68100" "65400" "65500" "65800" "6040"
## [17] "6800" "64400" "632" "66900" "66300" "6110" "6200" "66400"
## [25] "6080" "6210" "60000" "65900" "6140" "64700" "65200" "64600"
## [33] "6230" "6170" "64100" "6160" "64300" "6220" "66000" "63400"
## [41] "6950" "68600" "6920" "6930" "6850" "63700" "6270" "6100"
## [49] "63300" "64000" "6190" "6710" "67400" "6960" "6910" "65300"
## [57] "683" "6020" "6840" "645" "66500" "6030" "6980" "605"
## [65] "65700" "6150" "64900" "690"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "6")])
## [1] "64800" "61800" "6260" "6820" "64200" "66100" "6140" "6200"
## [9] "6210" "6170" "6250" "68100" "6270" "60000" "6070" "6110"
## [17] "6160" "6030" "6960" "65800" "6010" "6040" "64600" "6230"
## [25] "6910" "6800" "6950" "6080" "64700" "6930" "683" "6840"
## [33] "6150" "66300" "68600" "65200" "65900" "6920" "6220" "6940"
## [41] "6100" "63400" "605" "6020" "64400" "6190" "65600" "6980"
## [49] "66400" "6850" "645" "65400" "67400" "64100" "67000" "64900"
## [57] "66500"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "6")])
## [1] "6270" "6820" "61800" "65400" "6250" "68100" "64800" "66400"
## [9] "65800" "6080" "64400" "6160" "6010" "66000" "65300" "6020"
## [17] "6200" "66300" "64200" "6980" "6140" "6260" "605" "6170"
## [25] "6100" "60000" "65600" "6940" "66500" "64700" "6920" "6070"
## [33] "6110" "6950" "6230" "65900" "65500" "67000" "6210" "64600"
## [41] "64100" "6190" "6960" "65200" "6850" "6930" "6800" "6970"
## [49] "66100" "64300" "690" "68600" "6040" "65700" "6840" "6030"
## [57] "67400" "64900" "624" "6220" "66900" "6710"
lessThan47<-lessThan4[startsWith(lessThan4, "7")]
ulessThan47<-unique(lessThan47)
ulessThan47
## [1] "737" "786" "722" "784" "707" "715" "730" "738" "780" "721" "787"
## [12] "724" "789" "733" "711" "728" "782" "723" "799" "794" "729" "727"
## [23] "708" "756" "781" "726" "783" "751" "716" "788" "785" "714" "710"
## [34] "736" "790" "746" "719" "791" "759" "732" "747" "718" "720" "797"
## [45] "709" "796" "792" "717" "793" "703" "745" "705" "753" "725" "734"
## [56] "735" "706" "795" "731" "700" "704" "758" "701" "752" "702" "712"
## [67] "741" "713" "755" "742" "754" "748" "750" "744" "757"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan47)))
## [1] "The number of unique values: 75"
No Match: (family code) 780,787,799,790
Matched: 797,725,734
Add 00 ata the end of the code: 786,707,715,730,724,789,733,711,727,716,736,746,719,718,709,741,755,744
Add 0 at the end to rest of codes
diagNot<-as.character(c(780,787,799,790 ))
diagNochange<-as.character(c(797,725,734))
diag00<-as.character(c(786,707,715,730,724,789,733,711,727,716,736,746,719,718,709,741,755,744))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan47[!(ulessThan47 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "7")])
## [1] "7370" "78600" "7220" "7840" "70700" "71500" "73000" "7380"
## [9] "780" "7210" "787" "72400" "78900" "73300" "71100" "7280"
## [17] "7820" "7230" "799" "7940" "7290" "72700" "7080" "7560"
## [25] "7810" "7260" "7830" "7510" "71600" "7880" "7850" "7140"
## [33] "7100" "73600" "790" "74600" "71900" "7910" "7590" "7320"
## [41] "7470" "71800" "7200" "797" "70900" "7960" "7920" "7170"
## [49] "7930" "7030" "7450" "7050" "7530" "725" "734" "7350"
## [57] "7060" "7950" "7310" "7000" "7040"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "7")])
## [1] "71500" "70700" "780" "7880" "78600" "7850" "7290" "7810"
## [9] "73300" "787" "7100" "7590" "71100" "7840" "78900" "790"
## [17] "7210" "7830" "7220" "7140" "7230" "72400" "73000" "7380"
## [25] "7940" "7310" "799" "71900" "7820" "7260" "7530" "7280"
## [33] "71600" "7910" "7040" "7580" "7920" "72700" "71800" "7010"
## [41] "73600" "7050" "7520" "74600" "7450" "7370" "7020" "70900"
## [49] "7960" "7560" "725" "7170" "7950" "7120" "7060" "74100"
## [57] "7130" "75500" "7420" "7510" "7540" "734" "7470" "7930"
## [65] "7480" "797" "7500" "7030"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "7")])
## [1] "7140" "7810" "70700" "71500" "7940" "7840" "72400" "73000"
## [9] "78900" "7530" "78600" "71100" "7370" "7850" "7880" "7220"
## [17] "787" "780" "7820" "73300" "73600" "7030" "71600" "7290"
## [25] "799" "790" "7100" "7040" "7280" "7310" "7520" "74600"
## [33] "70900" "71800" "7060" "7260" "7910" "7210" "7930" "71900"
## [41] "734" "7560" "7920" "7830" "7050" "7120" "7230" "7010"
## [49] "7080" "7450" "72700" "7580" "7510" "7320" "7170" "7380"
## [57] "7420" "7350" "725" "74400" "7960" "74100" "7590" "7130"
## [65] "7470" "797" "7540" "75500" "7200" "7950" "7570" "7020"
## [73] "7500"
lessThan48<-lessThan4[startsWith(lessThan4, "8")]
ulessThan48<-unique(lessThan48)
ulessThan48
## [1] "845" "808" "812" "824" "807" "820" "822" "885" "823" "852" "805"
## [12] "851" "892" "803" "821" "825" "868" "826" "850" "840" "836" "813"
## [23] "844" "861" "801" "873" "860" "802" "853" "871" "848" "810" "863"
## [34] "864" "846" "878" "882" "843" "800" "831" "839" "815" "847" "827"
## [45] "891" "814" "865" "886" "816" "835" "881" "866" "893" "897" "867"
## [56] "890" "834" "879" "883" "854" "838" "880" "870" "862" "817" "842"
## [67] "806" "837" "895" "875" "804" "832" "833" "872" "869" "811" "894"
## [78] "884" "841" "877" "876"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan48)))
## [1] "The number of unique values: 81"
No Match:
Matched:
Add 00 zeros at the end of the code: 845,812,807,820,823,852,805,851,803,821,868,813,961,801,853,810,864,800,831,839,815,814,865,816,835,881,866,890,834,854,838,880,842,806,804,832,833,872,811
Add 0 at the end to rest of codes
#diagNot<-as.character(c())
#diagNochange<-as.character(c())
diag00<-as.character(c(845,812,807,820,823,852,805,851,803,821,868,813,961,801,853,810,864,800,831,839,815,814,865,816,835,881,866,890,834,854,838,880,842,806,804,832,833,872,811))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan48[!(ulessThan48 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "8")])
## [1] "84500" "8080" "81200" "8240" "80700" "82000" "8220" "8850"
## [9] "82300" "85200" "80500" "85100" "8920" "80300" "82100" "8250"
## [17] "86800" "8260" "8500" "8400" "8360" "81300" "8440" "8610"
## [25] "80100" "8730" "8600" "8020" "85300" "8710" "8480" "81000"
## [33] "8630" "86400" "8460" "8780" "8820" "8430" "80000" "83100"
## [41] "83900" "81500" "8470" "8270" "8910" "81400" "86500" "8860"
## [49] "81600" "83500" "88100" "86600" "8930" "8970" "8670" "89000"
## [57] "83400" "8790" "8830" "85400" "83800" "88000" "8700" "8620"
## [65] "8170" "84200" "80600" "8370" "8950" "8750" "80400" "83200"
## [73] "83300"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "8")])
## [1] "8670" "88100" "8470" "85200" "81600" "81300" "8610" "80500"
## [9] "8730" "8500" "8600" "8910" "8250" "82100" "8080" "81200"
## [17] "86400" "80700" "82000" "8240" "87200" "81400" "8820" "8400"
## [25] "8020" "8260" "82300" "8440" "83300" "8690" "8700" "81000"
## [33] "84500" "81100" "85100" "81500" "8370" "80100" "86800" "8920"
## [41] "8460" "83200" "8830" "84200" "80000" "8360" "86500" "83100"
## [49] "80600" "8620" "86600" "8790" "8220" "8710" "8930" "8940"
## [57] "85300" "88000" "8630" "8840" "8430"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "8")])
## [1] "81000" "8910" "80500" "8820" "8360" "8730" "81300" "8020"
## [9] "88100" "8830" "86600" "82100" "89000" "84500" "81200" "82300"
## [17] "8670" "82000" "80700" "81600" "86500" "8400" "8250" "8600"
## [25] "8260" "8240" "8920" "85400" "83100" "80100" "81100" "8470"
## [33] "84200" "8080" "8610" "81500" "85100" "86400" "8710" "85200"
## [41] "8620" "8700" "8440" "81400" "8500" "87200" "8840" "86800"
## [49] "85300" "88000" "8930" "8630" "80000" "8220" "8790" "8480"
## [57] "83800" "8370" "8410" "8770" "8750" "83400" "8760"
lessThan49<-lessThan4[startsWith(lessThan4, "9")]
ulessThan49<-unique(lessThan49)
ulessThan49
## [1] "999" "996" "998" "997" "995" "962" "922" "958" "969" "968" "966"
## [12] "965" "972" "924" "959" "942" "933" "977" "970" "913" "987" "911"
## [23] "989" "934" "983" "992" "967" "904" "973" "939" "991" "935" "936"
## [34] "920" "964" "917" "994" "990" "986" "945" "916" "928" "906" "980"
## [45] "955" "952" "921" "982" "914" "915" "971" "988" "963" "944" "923"
## [56] "957" "976" "974" "941" "903" "919" "975" "910" "905" "909" "918"
## [67] "947" "912" "953" "907" "908" "948" "927" "951" "943" "956" "930"
cat("\n")
print(paste("The number of unique values: ",length(ulessThan49)))
## [1] "The number of unique values: 77"
No match: 959
Matched: 936,920,986
Add 00 at the end of the code: 996,998,997,969,965,924,942,989,945,928,952,944,923,941,903,948,927,943
Add 0 at the end to the rest of the codes
diagNot<-as.character(c(959))
diagNochange<-as.character(c(936,920,986))
diag00<-as.character(c(996,998,997,969,965,924,942,989,945,928,952,944,923,941,903,948,927,943))
diagNot0<-c(diagNot,diag00,diagNochange)
diag0<-ulessThan49[!(ulessThan49 %in% diagNot0)]
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
for(i in diag00){
MyData_str[MyData_str==i]<-paste0(i,"00")
}
nonMatchingCodes<-c(nonMatchingCodes,diagNot)
# testing codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, "9")])
## [1] "9990" "99600" "99800" "99700" "9950" "9620" "9220" "9580"
## [9] "96900" "9680" "9660" "96500" "9720" "92400" "959" "94200"
## [17] "9330" "9770" "9700" "9130" "9870" "9110" "98900" "9340"
## [25] "9830" "9920" "9670" "9040" "9730" "9390" "9910" "9350"
## [33] "936" "920" "9640" "9170" "9940" "9900" "986" "94500"
## [41] "9160" "92800" "9060" "9800" "9550" "95200" "9210" "9820"
## [49] "9140" "9150" "9710" "9880" "9630" "94400" "92300" "9570"
## [57] "9760" "9740" "94100" "90300" "9190" "9750"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, "9")])
## [1] "99800" "9990" "99600" "99700" "9100" "9580" "9220" "9330"
## [9] "96900" "920" "9110" "92400" "9050" "9210" "959" "9670"
## [17] "9090" "9180" "9470" "9740" "9120" "92300" "9340" "9770"
## [25] "9150" "9530" "9190" "94500" "9060" "9620" "9130" "96500"
## [33] "9070" "9080" "9950" "9920" "9170" "9160" "9800" "9680"
## [41] "98900" "95200" "9900" "9720" "94400" "9630" "9750" "9940"
## [49] "94800" "9870" "9550" "9910" "94200" "92700"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, "9")])
## [1] "99600" "99800" "9990" "99700" "9180" "92400" "9050" "959"
## [9] "920" "9340" "9950" "92300" "9580" "94500" "9070" "9530"
## [17] "9120" "96500" "9210" "9190" "9080" "9510" "9620" "96900"
## [25] "9170" "94300" "9670" "9090" "9060" "9150" "9700" "9220"
## [33] "9660" "9720" "9160" "9560" "9870" "9100" "94400" "9910"
## [41] "9350" "9330" "9920" "9130" "9110" "9550" "9300" "98900"
## [49] "94800" "92800" "95200" "9800" "94200" "9710"
lessThan5<-diags[nchar(diags)==4]
lessThan5<-lessThan5[!startsWith(lessThan5, "E")]
unique(lessThan5)
## character(0)
cat("\n")
print(paste("The number of unique values: ",length(unique(lessThan5))))
## [1] "The number of unique values: 0"
Zero 4 digit numbers
lessThan6<-diags[nchar(diags)==5]
unique(lessThan6)
## [1] "250.7" "250.6" "250.4" "250.8" "250.1" "250.2" "250.3" "250.5" "250.9"
cat("\n")
print(paste("The number of unique values: ",length(unique(lessThan6))))
## [1] "The number of unique values: 9"
Add 0 to the end for all entries
diag0<-c("250.7","250.6","250.4","250.8","250.1","250.2","250.3","250.5","250.9")
for(i in diag0){
MyData_str[MyData_str==i]<-paste0(i,"0")
}
# test the codes
unique(MyData_str$diag_1[startsWith(MyData_str$diag_1, diag0)])
## [1] "250.40" "250.60" "250.32" "250.70" "250.13" "250.11" "250.80"
## [8] "250.82" "250.12" "250.81" "250.83" "250.42" "250.33" "250.41"
## [15] "250.23" "250.22" "250.43" "250.92" "250.50" "250.20" "250.10"
## [22] "250.30" "250.21"
unique(MyData_str$diag_2[startsWith(MyData_str$diag_2, diag0)])
## [1] "250.60" "250.13" "250.83" "250.41" "250.50" "250.81" "250.11"
## [8] "250.22" "250.70" "250.40" "250.51" "250.42" "250.53" "250.12"
## [15] "250.52" "250.91" "250.93" "250.10" "250.92" "250.82" "250.43"
## [22] "250.80" "250.32" "250.33" "250.20"
unique(MyData_str$diag_3[startsWith(MyData_str$diag_3, diag0)])
## [1] "250.70" "250.60" "250.43" "250.41" "250.50" "250.52" "250.82"
## [8] "250.53" "250.40" "250.81" "250.92" "250.80" "250.51" "250.83"
## [15] "250.11" "250.42" "250.91" "250.22" "250.93" "250.12" "250.13"
## [22] "250.90"
lessThan7<-diags[nchar(diags)==6]
unique(lessThan7)
## [1] "250.83" "250.11" "250.32" "250.13" "250.03" "250.02" "250.42"
## [8] "250.41" "250.22" "250.82" "250.33" "250.12" "250.81" "250.93"
## [15] "250.01" "250.31" "250.43" "250.23" "250.92" "250.53" "250.21"
## [22] "250.52" "250.51" "250.91" "365.44"
cat("\n")
print(paste("The number of unique values: ",length(unique(lessThan7))))
## [1] "The number of unique values: 25"
All are valid codes
write.csv(MyData_str,row.names=FALSE, file = "formated_diags.csv")
print("list of the codes that doesn't match with Find-A-Code:")
## [1] "list of the codes that doesn't match with Find-A-Code:"
nonMatchingCodes
## [1] "V53" "V71" "V54" "V25" "V15" "V61" "58" "187" "284" "275" "258"
## [12] "260" "243" "350" "312" "362" "323" "444" "445" "405" "484" "584"
## [23] "558" "585" "517" "645" "690" "624" "780" "787" "799" "790" "959"
Invalid_diag1<-MyData_str$diag_1[startsWith(MyData_str$diag_1, nonMatchingCodes)]
Invalid_diag2<-MyData_str$diag_2[startsWith(MyData_str$diag_2, nonMatchingCodes)]
Invalid_diag3<-MyData_str$diag_3[startsWith(MyData_str$diag_3, nonMatchingCodes)]
Invalid_diag1<-Invalid_diag1[nchar(Invalid_diag1)==3]
Invalid_diag2<-Invalid_diag2[nchar(Invalid_diag2)==3]
Invalid_diag3<-Invalid_diag3[nchar(Invalid_diag3)==3]
InvalidDiagsEntries<-c(Invalid_diag1,Invalid_diag2,Invalid_diag3)
cat("\n")
print("Print 20 entries of the invalid codes:")
## [1] "Print 20 entries of the invalid codes:"
head(InvalidDiagsEntries,20)
## [1] "780" "584" "780" "584" "584" "959" "780" "584" "780" "780" "787"
## [12] "585" "780" "799" "585" "780" "584" "584" "787" "780"
cat("\n")
print(paste( "There are a ",length(InvalidDiagsEntries)," entries that are not valid"))
## [1] "There are a 794 entries that are not valid"
library(magrittr)
readLines("formated_diags.csv") %>%
paste0(collapse="\n") %>%
openssl::base64_encode() -> encoded
NOTE:
When it prompt to download GIVE THE DOCUMENT A NAME with .csv as file extension